load packages
library(ggplot2)
library(data.table)
library(stringr)
library(dplyr)
library(tibble)
library(tidyr)
library(rprojroot)
library(Rtsne)
library(cowplot)
set paths and filenames
### files
long_table=sprintf("%s/data/mp142_TGVG1.1_MPA4_combined_abundance_table_longform1.tsv", find_rstudio_root_file())
metadata_table=sprintf("%s/data/some_teddy_MP142_metadata2.all_samples1.delivery.csv", find_rstudio_root_file())
load long data table, filter by SGBs detected in over 100 samples
long_dt <- fread(sprintf("%s", long_table), sep = "\t", header = T) %>%
select(sampleID, rel_abundance, lineage) %>%
mutate(kingdom = case_when(grepl("k__Bac", lineage) ~ "Bacteria",
grepl("k__Vir", lineage) ~ "Virus",
grepl("k__Ar", lineage) ~ "Archea",
grepl("k__Euk", lineage) ~ "Eukaryota",
TRUE ~ "other")) %>%
group_by(lineage) %>%
filter(n() >= 100) %>%
ungroup()
wide_bac_sp_dt <- long_dt %>%
filter(kingdom == "Bacteria") %>%
select(-kingdom) %>%
pivot_wider(names_from = lineage, values_from = rel_abundance, values_fill = 0)
sampleID_l <- wide_bac_sp_dt$sampleID
wide_bac_sp_dt <- wide_bac_sp_dt %>% select(-sampleID)
wide_vir_sp_dt <- long_dt %>%
filter(kingdom == "Virus") %>%
select(-kingdom) %>%
pivot_wider(names_from = lineage, values_from = rel_abundance, values_fill = 0)
sampleID_x <- wide_vir_sp_dt$sampleID
wide_vir_sp_dt <- wide_vir_sp_dt %>% select(-sampleID)
load metadata table
meta_dt <- fread(sprintf("%s", metadata_table), sep = ",", header = T) %>%
select(-V1) %>%
mutate(sample = as.character(sample))
run bacteria PCA
bac_prcomp1 <- prcomp(wide_bac_sp_dt)
names(bac_prcomp1)
[1] "sdev" "rotation" "center" "scale" "x"
#plot(bac_prcomp1$x[,1:2], pch = ".")
emb_bac <- Rtsne::Rtsne(bac_prcomp1$x[,1:20], perplexity = 20)
embb_bac <- emb_bac$Y
rownames(embb_bac) <- sampleID_l
embb_bac_dt <- as.data.frame(embb_bac) %>%
setDT(., keep.rownames = "sampleID")
embb_bac_meta_dt <- merge(embb_bac_dt, meta_dt, by.x = "sampleID", by.y = "sample") %>%
mutate(Country = case_when(
country == 1 ~ "USA",
country == 2 ~ "FIN",
country == 3 ~ "GER",
country == 4 ~ "SWE",
TRUE ~ "other"
))
bac_dayp <- embb_bac_meta_dt %>%
ggplot(aes(x=V1, y=V2, color=age_days, shape = Country)) +
geom_point(size = 1.3, alpha = 0.5) +
scale_color_gradient2(low = "maroon", mid = "grey", high = "#3F3F7B",
midpoint = 500, limits = c(0,1400), na.value = "black", name = "Day of Life") +
scale_shape_manual(values=c(15, 16, 17, 18)) +
theme_bw() +
labs(x = "tSNE-1", y = "tSNE-2", title = "tSNE of Bacteria in TEDDY samples")
bac_dayp

bac_T1Dp <- embb_bac_meta_dt %>%
ggplot(aes(x=V1, y=V2, color=T1D, shape = Country)) +
geom_point(size = 1.3, alpha = 0.5) +
scale_color_manual(values=c("#8CBEB1", "red")) +
scale_shape_manual(values=c(15, 16, 17, 18)) +
theme_bw() +
labs(x = "tSNE-1", y = "tSNE-2", title = "tSNE of Bacteria in TEDDY samples")
bac_T1Dp

run virus PCA
vir_prcomp1 <- prcomp(wide_vir_sp_dt)
#plot(vir_prcomp1$x[,1:2], pch = ".")
emb_vir <- Rtsne::Rtsne(vir_prcomp1$x[,1:20], perplexity = 20)
emb_vir <- emb_vir$Y
rownames(emb_vir) <- sampleID_x
embb_vir_dt <- as.data.frame(emb_vir) %>%
setDT(., keep.rownames = "sampleID")
embb_vir_meta_dt <- merge(embb_vir_dt, meta_dt, by.x = "sampleID", by.y = "sample") %>%
mutate(Country = case_when(
country == 1 ~ "USA",
country == 2 ~ "FIN",
country == 3 ~ "GER",
country == 4 ~ "SWE",
TRUE ~ "other"
))
vir_dayp <- embb_vir_meta_dt %>%
ggplot(aes(x=V1, y=V2, color=age_days, shape = Country)) +
geom_point(size = 1.3, alpha = 0.5) +
scale_color_gradient2(low = "maroon", mid = "grey", high = "#3F3F7B",
midpoint = 500, limits = c(0,1400), na.value = "black", name = "Day of Life") +
scale_shape_manual(values=c(15, 16, 17, 18)) +
theme_bw() +
labs(x = "tSNE-1", y = "tSNE-2", title = "tSNE of Viruses in TEDDY samples")
vir_dayp

vir_T1Dp <- embb_vir_meta_dt %>%
ggplot(aes(x=V1, y=V2, color=T1D, shape = Country)) +
geom_point(size = 1.3, alpha = 0.5) +
scale_color_manual(values=c("#8CBEB1", "red")) +
scale_shape_manual(values=c(15, 16, 17, 18)) +
theme_bw() +
labs(x = "tSNE-1", y = "tSNE-2", title = "tSNE of Viruses in TEDDY samples")
vir_T1Dp

NA
NA
combine and save
tSNE_combp <- plot_grid(bac_dayp, bac_T1Dp, vir_dayp, vir_T1Dp, align = "h", nrow = 2)
ggsave(tSNE_combp, file = sprintf("%s/charts/tSNE_bacteria_vs_virus_all_samples.pdf", find_rstudio_root_file()), width = 8, height = 8)
LS0tCnRpdGxlOiAiTWFrZSB0U05FIGZvciBiYWN0ZXJpYSBhbmQgdmlydXMgU0dCcywgY29sb3IgYnkgbWV0YWRhdGEiCm91dHB1dDogaHRtbF9ub3RlYm9vawotLS0KCmxvYWQgcGFja2FnZXMKYGBge3J9CmxpYnJhcnkoZ2dwbG90MikKbGlicmFyeShkYXRhLnRhYmxlKQpsaWJyYXJ5KHN0cmluZ3IpCmxpYnJhcnkoZHBseXIpCmxpYnJhcnkodGliYmxlKQpsaWJyYXJ5KHRpZHlyKQpsaWJyYXJ5KHJwcm9qcm9vdCkKbGlicmFyeShSdHNuZSkKbGlicmFyeShjb3dwbG90KQoKYGBgCgpzZXQgcGF0aHMgYW5kIGZpbGVuYW1lcwpgYGB7cn0KIyMjIGZpbGVzCmxvbmdfdGFibGU9c3ByaW50ZigiJXMvZGF0YS9tcDE0Ml9UR1ZHMS4xX01QQTRfY29tYmluZWRfYWJ1bmRhbmNlX3RhYmxlX2xvbmdmb3JtMS50c3YiLCBmaW5kX3JzdHVkaW9fcm9vdF9maWxlKCkpCm1ldGFkYXRhX3RhYmxlPXNwcmludGYoIiVzL2RhdGEvc29tZV90ZWRkeV9NUDE0Ml9tZXRhZGF0YTIuYWxsX3NhbXBsZXMxLmRlbGl2ZXJ5LmNzdiIsIGZpbmRfcnN0dWRpb19yb290X2ZpbGUoKSkKCmBgYAoKbG9hZCBsb25nIGRhdGEgdGFibGUsIGZpbHRlciBieSBTR0JzIGRldGVjdGVkIGluIG92ZXIgMTAwIHNhbXBsZXMKYGBge3J9CmxvbmdfZHQgPC0gZnJlYWQoc3ByaW50ZigiJXMiLCBsb25nX3RhYmxlKSwgc2VwID0gIlx0IiwgaGVhZGVyID0gVCkgJT4lCiAgc2VsZWN0KHNhbXBsZUlELCByZWxfYWJ1bmRhbmNlLCBsaW5lYWdlKSAlPiUKICBtdXRhdGUoa2luZ2RvbSA9IGNhc2Vfd2hlbihncmVwbCgia19fQmFjIiwgbGluZWFnZSkgfiAiQmFjdGVyaWEiLCAKICAgICAgICAgICAgICAgICAgICAgICAgICAgICBncmVwbCgia19fVmlyIiwgbGluZWFnZSkgfiAiVmlydXMiLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgIGdyZXBsKCJrX19BciIsIGxpbmVhZ2UpIH4gIkFyY2hlYSIsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgZ3JlcGwoImtfX0V1ayIsIGxpbmVhZ2UpIH4gIkV1a2FyeW90YSIsCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgVFJVRSB+ICJvdGhlciIpKSAlPiUKICBncm91cF9ieShsaW5lYWdlKSAlPiUKICBmaWx0ZXIobigpID49IDEwMCkgJT4lCiAgdW5ncm91cCgpCgp3aWRlX2JhY19zcF9kdCA8LSBsb25nX2R0ICU+JQogIGZpbHRlcihraW5nZG9tID09ICJCYWN0ZXJpYSIpICU+JQogIHNlbGVjdCgta2luZ2RvbSkgJT4lCiAgcGl2b3Rfd2lkZXIobmFtZXNfZnJvbSA9IGxpbmVhZ2UsIHZhbHVlc19mcm9tID0gcmVsX2FidW5kYW5jZSwgdmFsdWVzX2ZpbGwgPSAwKQoKc2FtcGxlSURfbCA8LSB3aWRlX2JhY19zcF9kdCRzYW1wbGVJRAoKCndpZGVfYmFjX3NwX2R0IDwtIHdpZGVfYmFjX3NwX2R0ICU+JSBzZWxlY3QoLXNhbXBsZUlEKQoKCndpZGVfdmlyX3NwX2R0IDwtIGxvbmdfZHQgJT4lCiAgZmlsdGVyKGtpbmdkb20gPT0gIlZpcnVzIikgJT4lCiAgc2VsZWN0KC1raW5nZG9tKSAlPiUKICBwaXZvdF93aWRlcihuYW1lc19mcm9tID0gbGluZWFnZSwgdmFsdWVzX2Zyb20gPSByZWxfYWJ1bmRhbmNlLCB2YWx1ZXNfZmlsbCA9IDApCgpzYW1wbGVJRF94IDwtIHdpZGVfdmlyX3NwX2R0JHNhbXBsZUlECgoKd2lkZV92aXJfc3BfZHQgPC0gd2lkZV92aXJfc3BfZHQgJT4lIHNlbGVjdCgtc2FtcGxlSUQpCmBgYAoKbG9hZCBtZXRhZGF0YSB0YWJsZQpgYGB7cn0KbWV0YV9kdCA8LSBmcmVhZChzcHJpbnRmKCIlcyIsIG1ldGFkYXRhX3RhYmxlKSwgc2VwID0gIiwiLCBoZWFkZXIgPSBUKSAlPiUKICBzZWxlY3QoLVYxKSAlPiUKICBtdXRhdGUoc2FtcGxlID0gYXMuY2hhcmFjdGVyKHNhbXBsZSkpCmBgYAoKcnVuIGJhY3RlcmlhIFBDQQpgYGB7cn0KYmFjX3ByY29tcDEgPC0gcHJjb21wKHdpZGVfYmFjX3NwX2R0KQpuYW1lcyhiYWNfcHJjb21wMSkKI3Bsb3QoYmFjX3ByY29tcDEkeFssMToyXSwgcGNoID0gIi4iKQoKZW1iX2JhYyA8LSBSdHNuZTo6UnRzbmUoYmFjX3ByY29tcDEkeFssMToyMF0sIHBlcnBsZXhpdHkgPSAyMCkKCmVtYmJfYmFjIDwtIGVtYl9iYWMkWQoKcm93bmFtZXMoZW1iYl9iYWMpIDwtIHNhbXBsZUlEX2wKCmVtYmJfYmFjX2R0IDwtIGFzLmRhdGEuZnJhbWUoZW1iYl9iYWMpICU+JQogIHNldERUKC4sIGtlZXAucm93bmFtZXMgPSAic2FtcGxlSUQiKQoKZW1iYl9iYWNfbWV0YV9kdCA8LSBtZXJnZShlbWJiX2JhY19kdCwgbWV0YV9kdCwgYnkueCA9ICJzYW1wbGVJRCIsIGJ5LnkgPSAic2FtcGxlIikgJT4lCiAgbXV0YXRlKENvdW50cnkgPSBjYXNlX3doZW4oCiAgICAgICAgIGNvdW50cnkgPT0gMSB+ICJVU0EiLAogICAgICAgICBjb3VudHJ5ID09IDIgfiAiRklOIiwKICAgICAgICAgY291bnRyeSA9PSAzIH4gIkdFUiIsCiAgICAgICAgIGNvdW50cnkgPT0gNCB+ICJTV0UiLAogICAgICAgICBUUlVFIH4gIm90aGVyIgogICAgICAgKSkKCmJhY19kYXlwIDwtIGVtYmJfYmFjX21ldGFfZHQgJT4lCiAgZ2dwbG90KGFlcyh4PVYxLCB5PVYyLCBjb2xvcj1hZ2VfZGF5cywgc2hhcGUgPSBDb3VudHJ5KSkgKwogIGdlb21fcG9pbnQoc2l6ZSA9IDEuMywgYWxwaGEgPSAwLjUpICsKICBzY2FsZV9jb2xvcl9ncmFkaWVudDIobG93ID0gIm1hcm9vbiIsIG1pZCA9ICJncmV5IiwgaGlnaCA9ICIjM0YzRjdCIiwgCiAgICAgICAgICAgICAgICAgICAgICAgIG1pZHBvaW50ID0gNTAwLCBsaW1pdHMgPSBjKDAsMTQwMCksIG5hLnZhbHVlID0gImJsYWNrIiwgbmFtZSA9ICJEYXkgb2YgTGlmZSIpICsKICBzY2FsZV9zaGFwZV9tYW51YWwodmFsdWVzPWMoMTUsIDE2LCAxNywgMTgpKSArCiAgdGhlbWVfYncoKSArCiAgbGFicyh4ID0gInRTTkUtMSIsIHkgPSAidFNORS0yIiwgdGl0bGUgPSAidFNORSBvZiBCYWN0ZXJpYSBpbiBURUREWSBzYW1wbGVzIikKYmFjX2RheXAKCmJhY19UMURwIDwtIGVtYmJfYmFjX21ldGFfZHQgJT4lCiAgZ2dwbG90KGFlcyh4PVYxLCB5PVYyLCBjb2xvcj1UMUQsIHNoYXBlID0gQ291bnRyeSkpICsKICBnZW9tX3BvaW50KHNpemUgPSAxLjMsIGFscGhhID0gMC41KSArCiAgc2NhbGVfY29sb3JfbWFudWFsKHZhbHVlcz1jKCIjOENCRUIxIiwgInJlZCIpKSArCiAgc2NhbGVfc2hhcGVfbWFudWFsKHZhbHVlcz1jKDE1LCAxNiwgMTcsIDE4KSkgKwogIHRoZW1lX2J3KCkgKwogIGxhYnMoeCA9ICJ0U05FLTEiLCB5ID0gInRTTkUtMiIsIHRpdGxlID0gInRTTkUgb2YgQmFjdGVyaWEgaW4gVEVERFkgc2FtcGxlcyIpCmJhY19UMURwCgpgYGAKCgpydW4gdmlydXMgUENBCmBgYHtyfQp2aXJfcHJjb21wMSA8LSBwcmNvbXAod2lkZV92aXJfc3BfZHQpCgojcGxvdCh2aXJfcHJjb21wMSR4WywxOjJdLCBwY2ggPSAiLiIpCgplbWJfdmlyIDwtIFJ0c25lOjpSdHNuZSh2aXJfcHJjb21wMSR4WywxOjIwXSwgcGVycGxleGl0eSA9IDIwKQoKZW1iX3ZpciA8LSBlbWJfdmlyJFkKCnJvd25hbWVzKGVtYl92aXIpIDwtIHNhbXBsZUlEX3gKCmVtYmJfdmlyX2R0IDwtIGFzLmRhdGEuZnJhbWUoZW1iX3ZpcikgJT4lCiAgc2V0RFQoLiwga2VlcC5yb3duYW1lcyA9ICJzYW1wbGVJRCIpCgplbWJiX3Zpcl9tZXRhX2R0IDwtIG1lcmdlKGVtYmJfdmlyX2R0LCBtZXRhX2R0LCBieS54ID0gInNhbXBsZUlEIiwgYnkueSA9ICJzYW1wbGUiKSAlPiUKICBtdXRhdGUoQ291bnRyeSA9IGNhc2Vfd2hlbigKICAgICAgICAgY291bnRyeSA9PSAxIH4gIlVTQSIsCiAgICAgICAgIGNvdW50cnkgPT0gMiB+ICJGSU4iLAogICAgICAgICBjb3VudHJ5ID09IDMgfiAiR0VSIiwKICAgICAgICAgY291bnRyeSA9PSA0IH4gIlNXRSIsCiAgICAgICAgIFRSVUUgfiAib3RoZXIiCiAgICAgICApKQoKdmlyX2RheXAgPC0gZW1iYl92aXJfbWV0YV9kdCAlPiUKICBnZ3Bsb3QoYWVzKHg9VjEsIHk9VjIsIGNvbG9yPWFnZV9kYXlzLCBzaGFwZSA9IENvdW50cnkpKSArCiAgZ2VvbV9wb2ludChzaXplID0gMS4zLCBhbHBoYSA9IDAuNSkgKwogIHNjYWxlX2NvbG9yX2dyYWRpZW50Mihsb3cgPSAibWFyb29uIiwgbWlkID0gImdyZXkiLCBoaWdoID0gIiMzRjNGN0IiLCAKICAgICAgICAgICAgICAgICAgICAgICAgbWlkcG9pbnQgPSA1MDAsIGxpbWl0cyA9IGMoMCwxNDAwKSwgbmEudmFsdWUgPSAiYmxhY2siLCBuYW1lID0gIkRheSBvZiBMaWZlIikgKwogIHNjYWxlX3NoYXBlX21hbnVhbCh2YWx1ZXM9YygxNSwgMTYsIDE3LCAxOCkpICsKICB0aGVtZV9idygpICsKICBsYWJzKHggPSAidFNORS0xIiwgeSA9ICJ0U05FLTIiLCB0aXRsZSA9ICJ0U05FIG9mIFZpcnVzZXMgaW4gVEVERFkgc2FtcGxlcyIpCnZpcl9kYXlwCgp2aXJfVDFEcCA8LSBlbWJiX3Zpcl9tZXRhX2R0ICU+JQogIGdncGxvdChhZXMoeD1WMSwgeT1WMiwgY29sb3I9VDFELCBzaGFwZSA9IENvdW50cnkpKSArCiAgZ2VvbV9wb2ludChzaXplID0gMS4zLCBhbHBoYSA9IDAuNSkgKwogIHNjYWxlX2NvbG9yX21hbnVhbCh2YWx1ZXM9YygiIzhDQkVCMSIsICJyZWQiKSkgKwogIHNjYWxlX3NoYXBlX21hbnVhbCh2YWx1ZXM9YygxNSwgMTYsIDE3LCAxOCkpICsKICB0aGVtZV9idygpICsKICBsYWJzKHggPSAidFNORS0xIiwgeSA9ICJ0U05FLTIiLCB0aXRsZSA9ICJ0U05FIG9mIFZpcnVzZXMgaW4gVEVERFkgc2FtcGxlcyIpCnZpcl9UMURwCgoKYGBgCgpjb21iaW5lIGFuZCBzYXZlCmBgYHtyfQp0U05FX2NvbWJwIDwtIHBsb3RfZ3JpZChiYWNfZGF5cCwgYmFjX1QxRHAsIHZpcl9kYXlwLCB2aXJfVDFEcCwgYWxpZ24gPSAiaCIsIG5yb3cgPSAyKQoKZ2dzYXZlKHRTTkVfY29tYnAsIGZpbGUgPSBzcHJpbnRmKCIlcy9jaGFydHMvdFNORV9iYWN0ZXJpYV92c192aXJ1c19hbGxfc2FtcGxlcy5wZGYiLCBmaW5kX3JzdHVkaW9fcm9vdF9maWxlKCkpLCB3aWR0aCA9IDgsIGhlaWdodCA9IDgpCgpgYGAKCgoKCgoKCg==